一言不合就学R

商务大数据智能分析R3

Lecturer : 申 旌 周
Instructor : 常 象 宇

2017年3月19日

概览

- 函数

- 作图

函数

  • 定义
  • 参数
  • 封装

函数 - 定义

rsummary <- function(x) {
  return(
    list(
    mean = mean(x), 
    sd = sd(x),
    quantiles = quantile(x)))
}
x <- rnorm(100, mean = 6, sd = 1)
rsummary(x)
$mean
[1] 6.1026

$sd
[1] 1.163002

$quantiles
      0%      25%      50%      75%     100% 
3.057118 5.214474 6.031743 6.888774 8.898776 

函数 - 定义

calGrade <- function(x, base = 59) {
  s <- x + base
  if(s >= 90) {
    grade <- "优"
  } else if(s >= 80) {
    grade <- "良"
  } else if(s >= 70) {
    grade <- "中"
  } else if(x >= 60){
    grade <- "及格"
  }else{
    grade <- "仍需努力"
  }
  grade
}
scores <- c(40, 20, 27, 31, 0, 0)
lapply(scores, calGrade)
[[1]]
[1] "优"

[[2]]
[1] "中"

[[3]]
[1] "良"

[[4]]
[1] "优"

[[5]]
[1] "仍需努力"

[[6]]
[1] "仍需努力"

函数 - 参数

  • formals
formals(calGrade)
$x


$base
[1] 59
formals(plot)
$x


$y


$...
  • 可以缺失,可能有缺省值

函数 - 参数

  • 参数匹配
(M <- matrix(1:6, nrow = 2)) # 精确匹配
     [,1] [,2] [,3]
[1,]    1    3    5
[2,]    2    4    6
(M <- matrix(1:6, nr = 2)) # 部分匹配
     [,1] [,2] [,3]
[1,]    1    3    5
[2,]    2    4    6
(M <- matrix(1:6, 2)) # 位置匹配
     [,1] [,2] [,3]
[1,]    1    3    5
[2,]    2    4    6

函数 - 参数

  • ...

quantile(x, probs = seq(0, 1, 0.25), ...)
apply(X, MARGIN, FUN, ...)

set.seed(1)
(x <- matrix(rnorm(12), 3)) 
           [,1]       [,2]      [,3]       [,4]
[1,] -0.6264538  1.5952808 0.4874291 -0.3053884
[2,]  0.1836433  0.3295078 0.7383247  1.5117812
[3,] -0.8356286 -0.8204684 0.5757814  0.3898432
apply(x, 2, quantile)  # matrix
           [,1]       [,2]      [,3]        [,4]
0%   -0.8356286 -0.8204684 0.4874291 -0.30538839
25%  -0.7310412 -0.2454803 0.5316052  0.04222742
50%  -0.6264538  0.3295078 0.5757814  0.38984324
75%  -0.2214052  0.9623943 0.6570530  0.95081220
100%  0.1836433  1.5952808 0.7383247  1.51178117

函数 - 参数

  • ...

quantile(x, probs = seq(0, 1, 0.25), ...)
apply(X, MARGIN, FUN, ...)

set.seed(1)
(x <- matrix(rnorm(12), 3))
           [,1]       [,2]      [,3]       [,4]
[1,] -0.6264538  1.5952808 0.4874291 -0.3053884
[2,]  0.1836433  0.3295078 0.7383247  1.5117812
[3,] -0.8356286 -0.8204684 0.5757814  0.3898432
apply(x, 2, quantile, probs = c(.25, .75))
          [,1]       [,2]      [,3]       [,4]
25% -0.7310412 -0.2454803 0.5316052 0.04222742
75% -0.2214052  0.9623943 0.6570530 0.95081220

函数 - 参数

  • ...
str(paste)
function (..., sep = " ", collapse = NULL)  
  • GOOD
paste("Hi", " There", sep = "!")
[1] "Hi! There"
  • BAD
paste("Hi", " There", se = "!")
[1] "Hi  There !"

函数 - 参数

  • ...
x <- 1:30
plot(x, log(x))

plot of chunk unnamed-chunk-16

  • 自定义函数
my.plot <- 
  function(x, y, type = "l", ...){
     plot(x, y, type = type, ...)
}
my.plot(x, log(x))

plot of chunk unnamed-chunk-18

函数 - 封装

  • Obsessive Compulsive Disorder, OCD

  • 主文件调用函数文件
source("./MyPlotEncap.R")  
my.plot.encap(x, log(x))

plot of chunk unnamed-chunk-19

概览

- 函数

- 作图

作图

  • plot
  • ggplot2
  • 其它

plot

  • 数据
  • 单变量作图
  • 多变量作图
  • 存储

plot - 数据

library(MASS)
str(birthwt)
'data.frame':   189 obs. of  10 variables:
 $ low  : int  0 0 0 0 0 0 0 0 0 0 ...
 $ age  : int  19 33 20 21 18 21 22 17 29 26 ...
 $ lwt  : int  182 155 105 108 107 124 118 103 123 113 ...
 $ race : int  2 3 1 1 1 3 1 3 1 1 ...
 $ smoke: int  0 0 1 1 1 0 0 0 1 1 ...
 $ ptl  : int  0 0 0 0 0 0 0 0 0 0 ...
 $ ht   : int  0 0 0 0 0 0 0 0 0 0 ...
 $ ui   : int  1 0 0 1 1 0 0 0 0 0 ...
 $ ftv  : int  0 3 1 2 0 0 1 1 1 0 ...
 $ bwt  : int  2523 2551 2557 2594 2600 2622 2637 2637 2663 2665 ...

plot - 数据

head(birthwt, 4)
   low age lwt race smoke ptl ht ui ftv  bwt
85   0  19 182    2     0   0  0  1   0 2523
86   0  33 155    3     0   0  0  0   3 2551
87   0  20 105    1     1   0  0  0   1 2557
88   0  21 108    1     1   0  0  1   2 2594
colnames(birthwt) <- 
  c("birthwt.below.2500", 
    "mother.age", "mother.weight",  
    "race", "mother.smokes", "previous.prem.labor",
    "hypertension", "uterine.irr", "physician.visits", 
    "birthwt.grams")
birthwt$mother.smokes <- as.factor(birthwt$mother.smokes)
levels(birthwt$mother.smokes)
[1] "0" "1"

plot - 单变量

plot(birthwt$mother.age)

plot of chunk unnamed-chunk-25

hist(birthwt$mother.age)

plot of chunk unnamed-chunk-26

plot - 单变量

plot(birthwt$mother.age)

plot of chunk unnamed-chunk-27

with(birthwt, plot (mother.age, birthwt.grams, 
xlab = "孕期妈妈年龄", 
ylab = "新生儿体重(克)",
col = mother.smokes,
pch = 19, # 实心点
cex = 0.7)) # 点的大小
abline(h = 2500)
legend("bottomright", c("不抽烟","抽烟"), col=c(1,2), pch=19)

plot of chunk unnamed-chunk-28

plot - 单变量

plot(birthwt$mother.smokes)

plot of chunk unnamed-chunk-29

  • plot(x, ...) 的作图行为因 class(x) 而变
plot(birthwt$mother.smokes, 
     main = "孕期妈妈抽烟分布图", 
     xlab = "孕期妈妈是否抽烟", 
     ylab = "数量",
     col = "lightblue")

plot of chunk unnamed-chunk-30

plot - 多变量

par(mfrow = c(1, 1))
with(birthwt, 
     plot(mother.smokes, 
          birthwt.grams, 
     xlab = "孕期妈妈是否抽烟", 
     ylab = "新生儿体重(克)"))

plot of chunk unnamed-chunk-31

plot - 多变量

with(birthwt, 
     plot(physician.visits, 
          birthwt.grams,
     xlab = "就诊数量", 
     ylab = "新生儿体重(克)",
     col = 'lightblue'))

plot of chunk unnamed-chunk-32

with(birthwt,      plot(as.factor(physician.visits),     birthwt.grams,
     xlab = "就诊数量", 
     ylab = "新生儿体重(克)",
     col = 'lightblue'))

plot of chunk unnamed-chunk-33

plot - 存储

  • png
png(file= "scatter.png", bg = "white", res = 120)
  with(birthwt,      
       plot(as.factor(physician.visits),     
            birthwt.grams,
            xlab = "就诊数量", 
            ylab = "新生儿体重(克)",
            col = 'lightblue'))
dev.off()
  • pdf
  • jpeg
  • 工作目录
    setwd()
    getwd()

作图

  • plot
  • ggplot2
  • 其它

作图 - 其它

Thank you